Note
Go to the end to download the full example code.
8.1 Using KAgents
Here we will show how to instanciate and use any of the Kernel Reinforcement Learning algorithms. Most of the code is similar to what you would find with standard RL libraries. Specificities are commented and explained below.
start game
Computed global error Bellman mean: 7.89555062855977e-07 iter: 1
Game 1
start game
Computed global error Bellman mean: 1.9174925518446647e-06 iter: 5
Game 2
start game
Computed global error Bellman mean: 0.004522472092035111 iter: 5
Game 3
start game
Computed global error Bellman mean: 0.19431530240631453 iter: 2
Game 4
start game
Computed global error Bellman mean: 4.7390296076303594e-07 iter: 5
Game 5
start game
Computed global error Bellman mean: 2.1349410692526907 iter: 1
Game 6
start game
Computed global error Bellman mean: 0.9118957326956381 iter: 5
Game 7
start game
Computed global error Bellman mean: 0.447413797743634 iter: 5
Game 8
start game
Computed global error Bellman mean: 0.9093983748700462 iter: 5
Game 9
start game
Computed global error Bellman mean: 0.9353377222334376 iter: 5
Game 10
start game
Computed global error Bellman mean: 0.4486237852201142 iter: 5
Game 11
start game
Computed global error Bellman mean: 0.5199007140480226 iter: 5
Game 12
start game
Computed global error Bellman mean: 0.028420690962717533 iter: 5
Game 13
start game
Computed global error Bellman mean: 0.3258012670198149 iter: 5
Game 14
start game
Computed global error Bellman mean: 0.12585790268695965 iter: 5
Game 15
<IPython.core.display.Video object>
# Importing necessary modules
import codpy.KQLearning as KQLearning
import gymnasium as gym
import imageio
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.animation as animation
env = gym.make("CartPole-v1", render_mode="rgb_array")
frames = []
# The agent requires action and observation space dimensions
agent = KQLearning.KQLearning(env.action_space.n, env.observation_space.shape[0], gamma=0.99)
steps = 0
games = 0
while games < 15:
print("start game")
# Store the game history for training
states, actions, next_states, rewards, dones = [], [], [], [], []
state, _ = env.reset()
steps = 0
while steps < 1000:
action = agent(state)
next_state, reward, done, _, _ = env.step(action)
frame = env.render()
frames.append(frame)
# The agent expects the game to be passed in reverse order
(
states.insert(0, state),
actions.insert(0, action),
next_states.insert(0, next_state),
rewards.insert(0, reward),
dones.insert(0, done),
)
steps += 1
state = next_state
if done:
break
# You train your agent once at the end of every episode
agent.train((states, actions, next_states, rewards, dones))
games += 1
print(f"Game {games}")
env.close()
# imageio.mimsave("cartpole.gif", frames, fps=30)
from IPython.display import HTML, Video, display
# HTML('<img src="cartpole.gif" style="max-width: 100%; height: auto;">')
fig = plt.figure()
plt.axis("off")
im = plt.imshow(frames[0])
def update(frame):
im.set_array(frame)
return im,
ani = animation.FuncAnimation(
fig,
update,
frames=frames, # Pass the list of frames
interval=50, # Delay between frames (ms)
blit=True # Optimize rendering
)
ani.save("cartpole.mp4", writer="ffmpeg", fps=30, dpi=100)
display(Video("cartpole.mp4", embed=True))
# plt.show()
Total running time of the script: (1 minutes 29.548 seconds)